Data on the geographic distribution of COVID-19 cases worldwide
Libraries
# install calmap
! pip install calmap
# essential libraries
import json
import random
from urllib.request import urlopen
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# storing and anaysis
import numpy as np
import pandas as pd
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
#import folium
# color pallette
cnf = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active case - yellow
# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# hide warnings
import warnings
warnings.filterwarnings('ignore')
# html embedding
from IPython.display import Javascript
from IPython.core.display import display
from IPython.core.display import HTML
dfCovid = pd.read_csv('COVID-19-geographic-disbtribution-worldwide-2020-03-21.csv')
dfCovid.head(3)
Prepocessing : Cleaning Data
# cases
cases = ['Cases', 'Deaths', 'Active']
# Active Case = confirmed - deaths - recovered
dfCovid['Active'] = dfCovid['Cases'] - dfCovid['Deaths']
# replacing Antigua_and_Barbuda with just Barbuda
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('Antigua_and_Barbuda', 'Barbuda')
# replacing Central_African_Republic with just CAR
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('Central_African_Republic', 'CAR')
# replacing Democratic_Republic_of_the_Congo with just DRC
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('Democratic_Republic_of_the_Congo', 'DRC')
# replacing United_Republic_of_Tanzania with just BarbTanzania
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('United_Republic_of_Tanzania', 'Tanzania')
# replacing United_States_of_America with just Barbuda
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('United_States_of_America', 'USA')
# replacing Cases_on_an_international_conveyance_Japan with just Japan
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('Cases_on_an_international_conveyance_Japan', 'Japan')
# replacing Saint_Vincent_and_the_Grenadines with just Grenadines
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('Saint_Vincent_and_the_Grenadines', 'Grenadines')
# replacing Bosnia_and_Herzegovina with just Bosnia
dfCovid['Countries and territories'] = dfCovid['Countries and territories'].replace('Bosnia_and_Herzegovina', 'Bosnia')
# filling missing values
dfCovid[['Countries and territories']] = dfCovid[['Countries and territories']].fillna('')
dfCovid[cases] = dfCovid[cases].fillna(0)
#Group by countries
temp = dfCovid.groupby(['Countries and territories'])['Cases', 'Deaths', 'Active'].sum().reset_index()
temp.style.background_gradient(cmap='Pastel1')
temp.head()
#Group by Year and Month
temp = dfCovid.groupby(['Year','Month'])['Cases', 'Deaths', 'Active'].sum().reset_index()
temp.style.background_gradient(cmap='Pastel1')
#Group by Year
temp = dfCovid.groupby('Year')['Cases', 'Deaths', 'Active'].sum().reset_index()
temp.style.background_gradient(cmap='Pastel1')
sns.distplot(temp['Cases'])
temp = dfCovid.groupby(['Countries and territories', 'Date'])['Cases', 'Deaths'].sum()
temp = temp.reset_index()
fig = px.bar(temp, x="Date", y="Cases", color='Countries and territories', orientation='v', height=600,
title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
fig = px.treemap(dfCovid.sort_values(by='Cases', ascending=False).reset_index(drop=True),
path=["Countries and territories"], values="Cases", height=700,
title='Number of Confirmed Cases',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()
fig = px.treemap(dfCovid.sort_values(by='Deaths', ascending=False).reset_index(drop=True),
path=["Countries and territories"], values="Deaths", height=700,
title='Number of Deaths reported',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()
temp = dfCovid.groupby('Countries and territories')['Cases', 'Deaths', 'Active'].sum().reset_index()
temp.style.background_gradient(cmap='Pastel1')
temp.head(3)
tm = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths'])
fig = px.treemap(tm, path=["variable"], values="value", height=400, width=600)
fig.show()
Country wise Data : In each country
dfCovid2 = dfCovid.drop(['Day','Month', 'Year'],axis='columns')
dfCovid2.head(3)
Countries with deaths reported
df_country = pd.read_csv('Country.csv', encoding='latin1')
df_country.head(5)
#result = df_country.join(dfCovid2, how='inner')
result = pd.merge(df_country, dfCovid2, on='GeoId');
result.head()
country = result.groupby(['Countries and territories', 'latitude', 'longitude'])['Cases', 'Deaths', 'Active'].sum().reset_index()
country.style.background_gradient(cmap='Pastel1')
country.head()
country = result
country.head(5)
# World wide
#import folium
#from folium.plugins import HeatMap
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
import webbrowser
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
min_zoom=1, max_zoom=4, zoom_start=1)
for i in range(0, len(country)):
folium.Circle(
location=[country.iloc[i]['latitude'], country.iloc[i]['longitude']],
color='crimson',
tooltip = '<li><bold>Countries and territories : '+str(country.iloc[i]['Countries and territories'])+
'<li><bold>Cases : '+str(country.iloc[i]['Cases'])+
'<li><bold>Deaths : '+str(country.iloc[i]['Deaths']),
radius=int(country.iloc[i]['Cases'])**1.1).add_to(m)
m
Countries with Confirmed Cases
# Confirmed
fig = px.choropleth(country, locations="Countries and territories",
locationmode='country names', color="Cases",
hover_name="Countries and territories", range_color=[1,7000],
color_continuous_scale="aggrnyl",
title='Countries with Confirmed Cases')
fig.update(layout_coloraxis_showscale=False)
fig.show()
Countries with Deaths Reported
# Deaths
fig = px.choropleth(country[country['Deaths']>0],
locations="Countries and territories", locationmode='country names',
color="Deaths", hover_name="Countries and territories",
range_color=[1,50], color_continuous_scale="agsunset",
title='Countries with Deaths Reported')
fig.update(layout_coloraxis_showscale=False)
fig.show()
Spread over time
formated_gdf = country.groupby(['Date', 'Countries and territories'])['Cases', 'Deaths'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Cases'].pow(0.3)
fig = px.scatter_geo(formated_gdf, locations="Countries and territories", locationmode='country names',
color="Cases", size='size', hover_name="Countries and territories",
range_color= [0, max(formated_gdf['Cases'])+2],
projection="natural earth", animation_frame="Date",
title='Spread over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()
Country Wise
temp = country.groupby(['Date', 'Countries and territories'])['Cases'].sum()
temp = temp.reset_index().sort_values(by=['Date', 'Countries and territories'])
plt.style.use('seaborn')
g = sns.FacetGrid(temp, col="Countries and territories", hue="Countries and territories",
sharey=False, col_wrap=5)
g = g.map(plt.plot, "Date", "Cases")
g.set_xticklabels(rotation=90)
plt.show()